#------------------------------------------------------------
# setup
#------------------------------------------------------------
`%>%` <- magrittr::`%>%`
options(width = 160)
library(mlogit)
library(xtable)
library(dplyr)
library(ggplot2)
library(RColorBrewer)

# load roll call data
load("./original_data/roll_call_votes.RData")

# load the rollcall meta data
load("./original_data/rcv_meta.RData")

# load the party groups data
load("./original_data/mep_groups.RData")

# load national parties data
load("./original_data/meps_national_parties.RData")

# remove roll calls that are not matchd via vote_id (those are mostly agenda
# votes)
rcv_meta <- rcv_meta %>%
  filter(!is.na(vote_id))

# load legislative observatory meta data
load("./original_data/oeils.RData")

# load votes data
load("./original_data/votes.RData")

part_votes <- c(
  "amended proposal",
  "amendment",
  "annex",
  "article",
  "block vote",
  "citation",
  "commission proposal",
  "council draft",
  "paragraph",
  "point",
  "proposal for rejection",
  "recital",
  "recommendation",
  "refer back to committee",
  "request for consultation",
  "request for vote",
  "subheading"
)
whole_votes <- c(
  "agenda",
  "approbation",
  "approval procedure",
  "budget",
  "consent procedure",
  "council implementing decision",
  "final vote",
  "interpretation of rules of procedure",
  "joint text",
  "single vote",
  "visa"
)

# part-vote, whole vote indicator
votes <- votes %>%
  mutate(
    whole_vote = case_when(
      vote_level %in% part_votes ~ 0,
      vote_level %in% whole_votes ~ 1
    )
  )
table(votes$whole_vote)

rcv_meta <- rcv_meta %>%
  mutate(
    whole_vote = case_when(
      rcv_type %in% part_votes ~ 0,
      rcv_type %in% whole_votes ~ 1
    )
  )

# generate a bill id from the vote_id
rcv_meta <- rcv_meta %>%
  dplyr::mutate(
    vote_item_id = stringr::str_remove(vote_id, "_\\d+$")
  ) %>%
  dplyr::relocate(vote_item_id, .before = vote_id)

# remove votes with missing OEIL_id and subject codes
rcv_meta <- rcv_meta %>%
  filter(!is.na(oeil_id)) %>%
  filter(!is.na(subject))

#------------------------------------------------------------
# roll calls
#------------------------------------------------------------

# add a indicator for the right which is: ID and ECR
rcvs <- rcv_votes %>%
  tibble::as_tibble() %>%
  mutate(
    party_group = case_when(
      party_group == "Verts/ALE" ~ "Greens",
      party_group == "GUE/NGL" ~ "The Left",
      TRUE ~ party_group
    )
  )

# new group names
mep_groups <- mep_groups %>%
  mutate(
    canonical = case_when(
      group_long == "Europe of Sovereign Nations Group" ~ "ESN",
      group_long == "Identity and Democracy Group" ~ "ID",
      group_long == "Patriots for Europe Group" ~ "PfE",
      TRUE ~ canonical
    )
  )

# members of the new ESN group
esn_members <- mep_groups %>%
  filter(ep == 10) %>%
  filter(canonical == "ESN")

# ESN national parties
esn_nat <- meps_national_parties %>%
  filter(mepid %in% esn_members$mepid)

# ESN national party names
esn_party_names <- esn_nat %>%
  select(party) %>%
  distinct() %>%
  filter(party != "Independent")

# MEPids who's national party is in the ESN group
party_based_ids <- meps_national_parties  %>%
  filter(mepid %in% rcvs$mepid) %>%
  filter(party %in% esn_party_names$party) %>%
  select(mepid) %>%
  distinct() %>%
  pull()

# MEPids who are in the ESN group in EP 10
esn_based_ids <- mep_groups %>%
  dplyr::filter(canonical == "ESN") %>%
  dplyr::select(mepid) %>%
  distinct() %>%
  pull()

# esn_ids as a combination of the two vectors
esn_ids <- union(party_based_ids, esn_based_ids)

# re-code party group membership in the roll call data
rcvs <- rcvs %>%
  mutate(
    party_group = case_when(
      mepid %in% esn_ids ~ "ESN",
      TRUE ~ party_group
    )
  )

# re-code ID to PfE
rcvs <- rcvs %>%
  mutate(
    party_group = case_when(
      party_group == "ID" ~ "PfE",
      party_group == "PPE" ~ "EPP",
      TRUE ~ party_group
    )
  )

# get the proportion of party members that voted with the party line and get
# the party line
df <- rcvs %>%
  filter(party_group != "NI") %>%
  group_by(rcv_id, party_group) %>%
  mutate(
    total_yes = sum(vote == "+", na.rm = TRUE),
    total_no = sum(vote == "-", na.rm = TRUE),
    total_abstain = sum(vote == "0", na.rm = TRUE)
  ) %>%
  mutate(
    majority = case_when(
      total_yes > total_no & total_yes >= total_abstain ~ "+",
      total_no > total_yes & total_no >= total_abstain ~ "-",
      total_abstain > total_yes & total_abstain > total_no ~ "0",
      total_yes == total_no & total_yes >= total_abstain ~ "split",
    )
  ) %>%
  summarise(
    majority = unique(majority),
    prop_yes = sum(vote == "+", na.rm = TRUE) / n(),
    prop_no = sum(vote == "-", na.rm = TRUE) / n(),
    prop_abstain = sum(vote == "0", na.rm = TRUE) / n(),
    prop_split = sum(vote == "split", na.rm = TRUE) / n(),
    .groups = "drop"
  ) %>%
  group_by(rcv_id, party_group) %>%
  mutate(
    prop = case_when(
      majority == "+" ~ prop_yes,
      majority == "-" ~ prop_no,
      majority == "0" ~ prop_abstain,
      majority == "split" ~ prop_split
    )
  ) %>%
  ungroup() %>%
  select(-prop_yes, -prop_no, -prop_abstain, -prop_split)

# aggregate voting
table(df$party_group, df$majority)

# to wide format
df_wide <- df %>%
  tidyr::pivot_wider(
    names_from = party_group, values_from = c(majority, prop)
  )
df_wide

# replace the NAs with "did not vote"
df_wide <- df_wide %>%
  mutate(
    across(
      where(is.character),
      ~case_when(
        is.na(.) ~ "did not vote",
        TRUE ~ as.character(.)
      )
    )
  )

# back to long format including did not vote
# Separate transformation for 'majority' columns
df_long_majority <- df_wide %>%
  select(rcv_id, starts_with("majority")) %>%
  tidyr::pivot_longer(
    cols = starts_with("majority"),
    names_to = "party_group",
    names_prefix = "majority_",
    values_to = "majority"
  )

# Separate transformation for 'prop' columns
df_long_prop <- df_wide %>%
  select(rcv_id, starts_with("prop")) %>%
  tidyr::pivot_longer(
    cols = starts_with("prop"),
    names_to = "party_group",
    names_prefix = "prop_",
    values_to = "prop"
  )

# Join the two data frames
df_long <- df_long_majority %>%
  left_join(df_long_prop, by = c("rcv_id", "party_group"))

# voting frequencies
table(df_long$party_group, df_long$majority)

# Remove votes where everyone votes the same
uncompetitive_ids <- df_wide %>%
  filter(
    majority_ESN == majority_PfE,
    majority_ESN == majority_ECR,
    majority_ESN == majority_EPP,
    majority_ESN == majority_Renew,
    majority_ESN == `majority_S&D`,
    majority_ESN == majority_Greens,
    majority_ESN == `majority_The Left`
  ) %>%
  pull(rcv_id)
df_wide <- df_wide %>%
  filter(!rcv_id %in% uncompetitive_ids)

# Remove uncompetive votes from the meta data as well
rcv_meta <- rcv_meta %>%
  filter(!rcv_id %in% uncompetitive_ids)

# container of possible right coalitions
right_list <- vector("list", 3)

# Grand Right: EPP & ECR & PfE & ESN united against the rest
right_list[[1]] <- df_wide %>%
  filter(
    majority_EPP == majority_ECR,
    majority_EPP == majority_ESN,
    majority_EPP == majority_PfE,
    majority_EPP != majority_Renew,
    majority_EPP != `majority_S&D`
  )

# Far Right ECR, PfE, and ESN against EPP, Renew, and S&D
right_list[[2]] <- df_wide %>%
  filter(
    majority_ECR == majority_PfE,
    majority_ECR == majority_ESN,
    majority_ECR != majority_EPP,
    majority_ECR != majority_Renew,
    majority_ECR != `majority_S&D`
  )

# Center Right+: EPP and either ECR or PfE or ESN against Renew and S&D
right_list[[3]] <- df_wide %>%
  filter(
    (
      # EPP votes with ECR and against PfE, ESN, Renew, and S&D
      (
        majority_EPP == majority_ECR & majority_EPP != majority_PfE &
          majority_EPP != majority_ESN
      ) |
        # EPP votes with PfE and against ECR, ESN, Renew, and S&D
        (
          majority_EPP == majority_PfE & majority_EPP != majority_ECR &
            majority_EPP != majority_ESN
        ) |
        # EPP votes with ESN and against ECR, PfE, Renew, and S&D
        (
          majority_EPP == majority_ESN & majority_EPP != majority_ECR &
            majority_EPP != majority_PfE
        ) |
        # EPP votes with ECR and ESN, against PfE, Renew, and S&D
        (
          majority_EPP == majority_ECR & majority_EPP == majority_ESN &
            majority_EPP != majority_PfE
        ) |
        # EPP votes with ECR and PfE, against ESN, Renew, and S&D
        (
          majority_EPP == majority_ECR & majority_EPP == majority_PfE &
            majority_EPP != majority_ESN
        ) |
        # EPP votes with PfE and ESN, against ECR, Renew, and S&D
        (
          majority_EPP == majority_PfE & majority_EPP == majority_ESN &
            majority_EPP != majority_ECR
        )
    ) &
      # EPP must vote against both center-left parties: Renew and S&D
      majority_EPP != majority_Renew & majority_EPP != `majority_S&D`
  )

names(right_list) <- c("Grand Right", "Far Right", "Center Right+")

# get the all the subjects in the data
all_topics <- lapply(seq_len(nrow(rcv_meta)), function(i) {
  # topics
  topics <- dplyr::slice(rcv_meta, i) %>%
    dplyr::pull(subject) %>%
    stringr::str_split("; ") %>%
    unlist()
  # filter out things that don't start with a topic
  topics <- topics[stringr::str_detect(topics, "^\\d")]
  return(topics)
}) %>%
  unlist() %>%
  .[!is.na(.)]

# get all the subject codes grouped by votes
all_topics_by_vote <- lapply(seq_len(nrow(rcv_meta)), function(i) {
  # topics
  topics <- dplyr::slice(rcv_meta, i) %>%
    dplyr::pull(subject) %>%
    stringr::str_split("; ") %>%
    unlist()
  # filter out things that don't start with a topic
  topics <- topics[stringr::str_detect(topics, "^\\d")]
  topics <- tibble::tibble(
    vote_id = dplyr::slice(rcv_meta, i) %>% dplyr::pull(rcv_id),
    subject = topics
  )
  return(topics)
}) %>%
  dplyr::bind_rows() %>%
  dplyr::filter(!is.na(subject))

# number of unique topics
length(unique(all_topics))
length(all_topics)

# get the topics data
source("generate_topics.r")
topics <- util_generate_topics(all_topics) %>%
  dplyr::filter(label != "Ambiguous")

# merge topics into all_topics_by_vote
all_topics_by_vote <- all_topics_by_vote %>%
  left_join(topics, by = c("subject" = "original"))

# output omits the categories to save space - table for the appendix
topics_output <- topics %>%
  dplyr::select(-category) %>%
  dplyr::rename(
    "Assigned Topic" = label,
    "Subject Code" = original
  ) %>%
  dplyr::arrange(`Assigned Topic`)

# latex
topics_xtable <- xtable(
  topics_output,
  caption = "Assigned Topics",
  label = "tab:assigned_topics"
)
add_linespace <- list()
add_linespace$pos <- as.list(seq(5, nrow(topics_output), by = 5))
add_linespace$command <- rep("\\addlinespace\n", length(add_linespace$pos))
print(
  topics_xtable,
  file = "./tables/table_appendix_0_1_topic_mapping.tex",
  caption.placement = "top",
  include.rownames = FALSE,
  add.to.row = add_linespace
)

# generate table of topic totals in the data
topic_totals <- lapply(seq_along(unique(topics$label)), function(x) {

  # current topic
  c_topic <- unique(topics$label)[x]

  # get all fine-grained original topics names that fall into the current
  # larger topic
  c_original <- topics %>%
    dplyr::filter(label == c_topic) %>%
    dplyr::pull(original)

  # get the current category
  c_category <- topics %>%
    dplyr::filter(label == c_topic) %>%
    dplyr::pull(category) %>%
    unique()

  # amount of times a OEIL subject code appears that falls into current topic
  freq_topic <- all_topics %in% c_original %>%
    sum()

  # output data
  out <- tibble::tibble(
    topic = c_topic,
    freq = freq_topic,
    category = c_category
  )

  return(out)
}) %>%
  dplyr::bind_rows() %>%
  dplyr::arrange(dplyr::desc(freq))

# the previous table provides frequencies of the topic; the following counts
# the unique number of votes on a topic
vote_level_totals <- lapply(seq_along(unique(topics$label)), function(x) {

  # current topic
  c_topic <- unique(topics$label)[x]

  # get the number of votes in rcv_meta that fall into the current topic
  n_votes_on_topic <- all_topics_by_vote %>%
    dplyr::group_by(vote_id) %>%
    dplyr::mutate(
      label_found = dplyr::case_when(
        c_topic %in% label ~ TRUE,
        TRUE ~ FALSE
      )
    ) %>%
    dplyr::summarise(
      label_found = any(label_found)
    ) %>%
    dplyr::pull(label_found) %>%
    sum()

  # output data
  out <- tibble::tibble(
    topic = c_topic,
    total = n_votes_on_topic
  )

  return(out)
}) %>%
  dplyr::bind_rows() %>%
  dplyr::arrange(dplyr::desc(total))

# loop over right coalition types
right_out <- lapply(seq_along(right_list), function(zz) {

  # current right coalition
  united_right <- right_list[[zz]]

  # right topics
  right_topics <- united_right$rcv_id
  right_meta <- rcv_meta %>%
    dplyr::filter(rcv_id %in% right_topics)

  # is not missing on oeil_id
  right_meta <- right_meta %>%
    dplyr::filter(!is.na(oeil_id))

  # is not missing on subject
  right_meta <- right_meta %>%
    dplyr::filter(!is.na(subject))

  # share of votes that unite the right
  coalition_share <- nrow(right_meta) / nrow(rcv_meta) * 100

  # number of votes that unite the right
  coalition_n <- nrow(right_meta)

  # share of legislative votes
  share_leg <- nrow(right_meta %>% filter(legislative == 1)) /
    nrow(right_meta) * 100

  # share of whole votes
  share_whole <- nrow(right_meta %>% filter(whole_vote == 1)) /
    nrow(right_meta) * 100

  # shares data
  shares_out <- tibble(
    coalition_n = coalition_n,
    coalition_share = coalition_share,
    share_leg = share_leg,
    share_whole = share_whole
  )

  # get all the subject codes grouped by votes
  right_topics_by_vote <- lapply(seq_len(nrow(right_meta)), function(i) {
    # topics
    topics <- dplyr::slice(right_meta, i) %>%
      dplyr::pull(subject) %>%
      stringr::str_split("; ") %>%
      unlist()
    # filter out things that don't start with a topic
    topics <- topics[stringr::str_detect(topics, "^\\d")]
    topics <- tibble::tibble(
      vote_id = dplyr::slice(right_meta, i) %>% dplyr::pull(rcv_id),
      subject = topics
    )
    return(topics)
  }) %>%
    dplyr::bind_rows() %>%
    dplyr::filter(!is.na(subject))

  # assign the the labels to the right topics
  right_labels <- util_generate_topics(unique(right_topics_by_vote$subject)) %>%
    dplyr::filter(label != "Ambiguous")

  # merge right labels into right_topics_by_vote
  right_topics_by_vote <- right_topics_by_vote %>%
    left_join(right_labels, by = c("subject" = "original"))

  # count vote level totals of right topics
  vote_level_totals <- lapply(seq_along(
    unique(right_labels$label)
  ), function(x) {

    # current topic
    c_topic <- unique(right_labels$label)[x]

    # get the number of votes in rcv_meta that fall into the current topic
    n_votes_on_topic <- right_topics_by_vote %>%
      dplyr::group_by(vote_id) %>%
      dplyr::mutate(
        label_found = dplyr::case_when(
          c_topic %in% label ~ TRUE,
          TRUE ~ FALSE
        )
      ) %>%
      dplyr::summarise(
        label_found = any(label_found)
      ) %>%
      dplyr::pull(label_found) %>%
      sum()

    # output data
    out <- tibble::tibble(
      topic = c_topic,
      total = n_votes_on_topic
    )

    return(out)
  }) %>%
    dplyr::bind_rows() %>%
    dplyr::arrange(dplyr::desc(total))

  # shares of the right coalion
  vote_level_totals <- vote_level_totals %>%
    dplyr::mutate(
      share_of_coalition_n = total / sum(total) * 100
    )

  return(list(topics = vote_level_totals, aggregates = shares_out))
})

# check that there is no overlap in coalitions
all(
  c(
    all(!right_list[[1]]$rcv_id %in% right_list[[2]]$rcv_id),
    all(!right_list[[1]]$rcv_id %in% right_list[[3]]$rcv_id),
    all(!right_list[[2]]$rcv_id %in% right_list[[3]]$rcv_id)
  )
)

# aggregate shares for the right coalitions
right_aggs <- do.call(rbind, right_out)[, "aggregates"]

# shares for votes not in the coalitions
other_meta <- rcv_meta %>%
  filter(!rcv_id %in% right_list[[1]]$rcv_id) %>%
  filter(!rcv_id %in% right_list[[2]]$rcv_id) %>%
  filter(!rcv_id %in% right_list[[3]]$rcv_id)
other_aggs <- tibble(
  coalition_n = nrow(other_meta),
  coalition_share = nrow(other_meta) / nrow(rcv_meta) * 100,
  share_leg = nrow(other_meta %>% filter(legislative == 1)) /
    nrow(other_meta) * 100,
  share_whole = nrow(other_meta %>% filter(whole_vote == 1)) /
    nrow(other_meta) * 100
)

# descriptive table of coalition proportions
desc_table <- rbind(
  formatC(x = as.matrix(right_aggs[[1]]), format = "f", digits = 2),
  formatC(x = as.matrix(right_aggs[[2]]), format = "f", digits = 2),
  formatC(x = as.matrix(right_aggs[[3]]), format = "f", digits = 2),
  formatC(x = as.matrix(other_aggs), format = "f", digits = 2)
)
desc_table <- cbind(
  c("Grand Right", "Far Right", "Center Right+", "Other"),
  desc_table
)
colnames(desc_table) <- c(
  "", "N", "Share", "Legislative", "Final Passage"
)

# save coaltion shares table
xtable(
  desc_table,
  caption = "Right Coalitions across Vote Types",
  label = "tab:right_coalitions"
) %>%
  print(
    file = "./tables/table_1.tex",
    caption.placement = "top"
  )

# topics of right coalitions
right_freqs <- do.call("rbind", right_out)[, "topics"]

# total number of votes on a specific topic in all of EP 9
vote_level_totals <- vote_level_totals %>%
  dplyr::rename(grand_total = total)

# Grand Right
grand_right <- right_freqs[[1]] %>%
  dplyr::left_join(vote_level_totals, by = "topic") %>%
  dplyr::mutate(
    share_of_grand_total = total / grand_total * 100
  ) %>%
  #dplyr::filter(total > 25) %>%
  dplyr::relocate(grand_total, .after = total) %>%
  arrange(desc(share_of_grand_total)) %>%
  dplyr::select(topic, total, grand_total, share_of_grand_total) %>%
  dplyr::mutate(
    share_of_grand_total = sprintf("%.1f%%", share_of_grand_total)
  )

# Far Right
far_right <- right_freqs[[2]] %>%
  dplyr::left_join(vote_level_totals, by = "topic") %>%
  dplyr::mutate(
    share_of_grand_total = total / grand_total * 100
  ) %>%
  #dplyr::filter(total > 25) %>%
  dplyr::relocate(grand_total, .after = total) %>%
  arrange(desc(share_of_grand_total)) %>%
  dplyr::select(topic, total, grand_total, share_of_grand_total) %>%
  dplyr::mutate(
    share_of_grand_total = sprintf("%.1f%%", share_of_grand_total)
  )

# Center Right+
center_right_plus <- right_freqs[[3]] %>%
  dplyr::left_join(vote_level_totals, by = "topic") %>%
  dplyr::mutate(
    share_of_grand_total = total / grand_total * 100
  ) %>%
  #dplyr::filter(total > 25) %>%
  dplyr::relocate(grand_total, .after = total) %>%
  arrange(desc(share_of_grand_total)) %>%
  dplyr::select(topic, total, grand_total, share_of_grand_total) %>%
  dplyr::mutate(
    share_of_grand_total = sprintf("%.1f%%", share_of_grand_total)
  )

# top topics with coaltion potential across all right coalitions
top_topics <- bind_rows(
  grand_right %>%
    mutate(coalition = "Grand Right"),
  far_right %>%
    mutate(coalition = "Far Right"),
  center_right_plus %>%
    mutate(coalition = "Center Right+")
) %>%
  dplyr::mutate(
    share_of_grand_total = stringr::str_remove(
      string = share_of_grand_total,
      pattern = "%"
    )
  ) %>%
  dplyr::mutate(
    share_of_grand_total = as.numeric(share_of_grand_total)
  )

# wide format
top_topics <- top_topics %>%
  dplyr::rename(share = share_of_grand_total) %>%
  dplyr::select(-total, -grand_total) %>%
  tidyr::pivot_wider(
    names_from = coalition,
    values_from = share
  ) %>%
  dplyr::mutate(
    dplyr::across(
      dplyr::where(is.numeric),
      ~dplyr::case_when(
        is.na(.) ~ 0,
        TRUE ~ .
      )
    )
  ) %>%
  dplyr::rowwise() %>%
  dplyr::mutate(
    total = `Grand Right` + `Far Right` + `Center Right+`
  ) %>%
  dplyr::arrange(dplyr::desc(total))

top_topics_xtable <- xtable(
  top_topics,
  caption = "Topics that Unite the Right",
  label = "tab:top_topics"
)
print(
  top_topics_xtable,
  file = "./tables/table_2.tex",
  caption.placement = "top",
  include.rownames = FALSE
)
